Setup

library(here)
here() starts at /Users/maarten/Documents/projects/gamified-feedback-study
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
library(scales)
library(patchwork)
Warning: package 'patchwork' was built under R version 4.3.3
library(stringr)
library(tidyr)
library(lme4)
Loading required package: Matrix

Attaching package: 'Matrix'
The following objects are masked from 'package:tidyr':

    expand, pack, unpack
library(lmerTest)

Attaching package: 'lmerTest'
The following object is masked from 'package:lme4':

    lmer
The following object is masked from 'package:stats':

    step
library(brms)
Loading required package: Rcpp
Loading 'brms' package (version 2.21.0). Useful instructions
can be found by typing help('brms'). A more detailed introduction
to the package is available through vignette('brms_overview').

Attaching package: 'brms'
The following object is masked from 'package:lme4':

    ngrps
The following object is masked from 'package:stats':

    ar
# Set up parallel processing for Bayesian models
library(future)
plan(multisession, workers = 4)

Helper functions for plots and tables:

source(here("scripts", "00_visualisation_functions.R"))
Loading required package: data.table

Attaching package: 'data.table'
The following objects are masked from 'package:dplyr':

    between, first, last
Loading required package: flextable

Load processed data:

d_learn <- readRDS(here("data", "processed", "d_learn.rds"))
add_experiment_cols <- function (data) {
  data |>
    mutate(exp_order = case_when(
      gamified_first == 0 & exp_group == "score" ~ "Control—Score",
      gamified_first == 0 & exp_group == "both" ~ "Control—Both",
      gamified_first == 1 & exp_group == "score" ~ "Score—Control",
      gamified_first == 1 & exp_group == "both" ~ "Both—Control"
    )) |>
    mutate(type = ifelse(gamified, "Gamified", "Control"))
}

Helper function for interpreting Bayes factors:

bf_to_strength <- function (bf) {
  
  direction <- "for"
  
  if (bf < 1) {
    bf <- 1/bf
    direction <- "against"
  }
  
  strength <- case_when(
    bf == 1 ~ "No",
    bf < 3 ~ "Anecdotal",
    bf < 10 ~ "Moderate",
    bf < 30 ~ "Strong",
    bf < 100 ~ "Very strong",
    TRUE ~ "Extreme"
  )
  
  paste0(strength, " evidence ", direction)
}

Does gamification change performance during practice?

Accuracy

Prepare data

d_learn_acc <- d_learn |>
  filter(!study_trial) |>
  group_by(subject, exp_group, block, condition, gamified, gamified_first) |>
  summarise(accuracy = mean(correct))
`summarise()` has grouped output by 'subject', 'exp_group', 'block',
'condition', 'gamified'. You can override using the `.groups` argument.
d_learn_acc_agg <- d_learn_acc |>
  group_by(block, condition, gamified, gamified_first, exp_group) |>
  summarise(acc = mean(accuracy, na.rm = T),
            acc_se = sd(accuracy, na.rm = T)/sqrt(n())) |>
  ungroup() |>
  add_experiment_cols()
`summarise()` has grouped output by 'block', 'condition', 'gamified',
'gamified_first'. You can override using the `.groups` argument.

Visualise data

p_learn_acc <- plot_data(d_learn_acc_agg, acc, acc_se, "Accuracy") +
  scale_y_continuous(limits = c(.725, .875), labels = scales::percent_format())

p_learn_acc

Fit frequentist model

Prepare data for modelling by mean-centering categorical predictors:

d_learn_m <- d_learn |>
  filter(!study_trial) |>
  mutate(exp_group_c = ifelse(exp_group == "score", 0, 1),
         exp_group_c = exp_group_c - mean(exp_group_c),
         gamified_first_c = gamified_first - mean(gamified_first))
m_learn_acc <- glmer(correct ~ gamified +
                       gamified:exp_group_c +
                       gamified:gamified_first_c +
                       gamified:gamified_first_c:exp_group_c +
                       (1 | subject) + (1 | fact),
                     family = "binomial",
                     data = d_learn_m)

summary(m_learn_acc)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: 
correct ~ gamified + gamified:exp_group_c + gamified:gamified_first_c +  
    gamified:gamified_first_c:exp_group_c + (1 | subject) + (1 |      fact)
   Data: d_learn_m

     AIC      BIC   logLik deviance df.resid 
 38686.5  38772.8 -19333.3  38666.5    41479 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-5.4230  0.2697  0.4166  0.5308  0.8948 

Random effects:
 Groups  Name        Variance Std.Dev.
 subject (Intercept) 0.2932   0.5415  
 fact    (Intercept) 0.1178   0.3432  
Number of obs: 41489, groups:  subject, 166; fact, 78

Fixed effects:
                                            Estimate Std. Error z value
(Intercept)                                 1.621184   0.060640  26.735
gamifiedTRUE                               -0.007472   0.025932  -0.288
gamifiedFALSE:exp_group_c                  -0.181824   0.092172  -1.973
gamifiedTRUE:exp_group_c                   -0.160723   0.092242  -1.742
gamifiedFALSE:gamified_first_c              0.069727   0.092386   0.755
gamifiedTRUE:gamified_first_c              -0.079241   0.092419  -0.857
gamifiedFALSE:exp_group_c:gamified_first_c -0.245058   0.185121  -1.324
gamifiedTRUE:exp_group_c:gamified_first_c  -0.181772   0.185123  -0.982
                                           Pr(>|z|)    
(Intercept)                                  <2e-16 ***
gamifiedTRUE                                 0.7732    
gamifiedFALSE:exp_group_c                    0.0485 *  
gamifiedTRUE:exp_group_c                     0.0814 .  
gamifiedFALSE:gamified_first_c               0.4504    
gamifiedTRUE:gamified_first_c                0.3912    
gamifiedFALSE:exp_group_c:gamified_first_c   0.1856    
gamifiedTRUE:exp_group_c:gamified_first_c    0.3261    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
              (Intr) gmTRUE gmfdFALSE:x__ gmfdTRUE:x__ gmfdFALSE:g__
gamifidTRUE   -0.215                                                
gmfdFALSE:x__ -0.014  0.017                                         
gmfdTRUE:x__  -0.008 -0.012  0.841                                  
gmfdFALSE:g__ -0.011  0.000 -0.010        -0.004                    
gmfdTRUE:g__  -0.014  0.001 -0.004        -0.009        0.842       
gFALSE:__:_   -0.008  0.011 -0.017        -0.016       -0.019       
gTRUE:__:__   -0.003 -0.009 -0.016        -0.018       -0.010       
              gmfdTRUE:g__ gFALSE:__:
gamifidTRUE                          
gmfdFALSE:x__                        
gmfdTRUE:x__                         
gmfdFALSE:g__                        
gmfdTRUE:g__                         
gFALSE:__:_   -0.010                 
gTRUE:__:__   -0.016        0.842    
print_model_table(m_learn_acc)

Effect

Estimate

SE

z-value

p-value

Intercept (Control)

1.62

0.06

26.73

<.001

Gamification

-0.01

0.03

-0.29

.773

Type of gamification (Points-and-progress - Points)

-0.16

0.09

-1.74

.081

Type of gamification on Control (Points-and-progress - Points)

-0.18

0.09

-1.97

.049

Block on gamification (1 - 2)

-0.08

0.09

-0.86

.391

Block on Control (2 - 1)

0.07

0.09

0.75

.450

Type of gamification; block difference (Block 1 - Block 2)

-0.18

0.19

-0.98

.326

Type of gamification on Control; block difference (Block 2 - Block 1)

-0.25

0.19

-1.32

.186

Fitted values

d_model_fit <- crossing(
  gamified = FALSE, 
  exp_group_c = sort(unique(d_learn_m$exp_group_c)), 
  gamified_first_c = 0
)

d_model_fit$model_fit <- predict(m_learn_acc,
                                 newdata = d_model_fit,
                                 re.form = NA, 
                                 type = "response")

d_model_fit

Visualise fitted model

p_learn_acc_m <- plot_model_fit(m_learn_acc, d_learn_m, y_lab = "Accuracy") +
  scale_y_continuous(limits = c(.75, .90), labels = scales::percent_format(accuracy = .1))
  block           condition gamified gamified_first exp_group gamified_first_c
1     1             Control    FALSE          FALSE      both       -0.5320687
2     1             Control    FALSE          FALSE     score       -0.5320687
3     1              Points     TRUE           TRUE     score        0.4679313
4     1 Points-and-progress     TRUE           TRUE      both        0.4679313
5     2             Control    FALSE           TRUE      both        0.4679313
6     2             Control    FALSE           TRUE     score        0.4679313
7     2              Points     TRUE          FALSE     score       -0.5320687
8     2 Points-and-progress     TRUE          FALSE      both       -0.5320687
  exp_group_c  pred_val     exp_order     type
1   0.5296826 0.8258994  Control—Both  Control
2  -0.4703174 0.8331719 Control—Score  Control
3  -0.4703174 0.8445168 Score—Control Gamified
4   0.5296826 0.8094510  Both—Control Gamified
5   0.5296826 0.8170913  Both—Control  Control
6  -0.4703174 0.8573262 Score—Control  Control
7  -0.4703174 0.8436944 Control—Score Gamified
8   0.5296826 0.8350666  Control—Both Gamified
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
p_learn_acc_m

Fit Bayesian model

Fit again with brms so that we can calculate Bayes Factors. Because we expect any fixed effects to be at most moderate in size, we will use a weakly informative Normal(0, 1) prior for these effects.

m_learn_acc_brms <- brm(correct ~ gamified +
                          gamified:exp_group_c +
                          gamified:gamified_first_c +
                          gamified:gamified_first_c:exp_group_c +
                          (1 | subject) + (1 | fact),
                        family = "bernoulli",
                        data = d_learn_m,
                        prior = set_prior("normal(0, 1)", class = "b"),
                        chains = 4,
                        iter = 11000,
                        warmup = 1000,
                        sample_prior = TRUE,
                        future = TRUE,
                        seed = 0)
Compiling Stan program...
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 15.0.0 (clang-1500.3.9.4)’
using SDK: ‘MacOSX14.4.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
         ^~~~~~~
1 error generated.
make[1]: *** [foo.o] Error 1
Start sampling

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.002957 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 29.57 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 1: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 1: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 1: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 1: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 1: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 1: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 1: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 1: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 1: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 1: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 1: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 56.415 seconds (Warm-up)
Chain 1:                383.047 seconds (Sampling)
Chain 1:                439.462 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 0.002551 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 25.51 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 2: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 2: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 2: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 2: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 2: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 2: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 2: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 2: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 2: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 2: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 2: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 57.533 seconds (Warm-up)
Chain 2:                374.391 seconds (Sampling)
Chain 2:                431.924 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 0.002526 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 25.26 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 3: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 3: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 3: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 3: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 3: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 3: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 3: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 3: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 3: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 3: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 3: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 56.046 seconds (Warm-up)
Chain 3:                366.703 seconds (Sampling)
Chain 3:                422.749 seconds (Total)
Chain 3: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 0.005336 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 53.36 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 4: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 4: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 4: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 4: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 4: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 4: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 4: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 4: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 4: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 4: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 4: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 56.192 seconds (Warm-up)
Chain 4:                541.125 seconds (Sampling)
Chain 4:                597.317 seconds (Total)
Chain 4: 
summary(m_learn_acc_brms)
 Family: bernoulli 
  Links: mu = logit 
Formula: correct ~ gamified + gamified:exp_group_c + gamified:gamified_first_c + gamified:gamified_first_c:exp_group_c + (1 | subject) + (1 | fact) 
   Data: d_learn_m (Number of observations: 41489) 
  Draws: 4 chains, each with iter = 11000; warmup = 1000; thin = 1;
         total post-warmup draws = 40000

Multilevel Hyperparameters:
~fact (Number of levels: 78) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     0.35      0.04     0.29     0.43 1.00     8273    14730

~subject (Number of levels: 166) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     0.55      0.04     0.49     0.63 1.00     7175    11801

Regression Coefficients:
                                           Estimate Est.Error l-95% CI u-95% CI
Intercept                                      1.62      0.06     1.50     1.75
gamifiedTRUE                                  -0.01      0.03    -0.06     0.04
gamifiedFALSE:exp_group_c                     -0.18      0.09    -0.36     0.00
gamifiedTRUE:exp_group_c                      -0.16      0.09    -0.34     0.02
gamifiedFALSE:gamified_first_c                 0.07      0.09    -0.11     0.25
gamifiedTRUE:gamified_first_c                 -0.08      0.09    -0.26     0.11
gamifiedFALSE:exp_group_c:gamified_first_c    -0.23      0.18    -0.60     0.12
gamifiedTRUE:exp_group_c:gamified_first_c     -0.17      0.18    -0.53     0.19
                                           Rhat Bulk_ESS Tail_ESS
Intercept                                  1.00     5005    10011
gamifiedTRUE                               1.00    43753    29667
gamifiedFALSE:exp_group_c                  1.00     5185     9292
gamifiedTRUE:exp_group_c                   1.00     5241     9538
gamifiedFALSE:gamified_first_c             1.00     5024    10205
gamifiedTRUE:gamified_first_c              1.00     4946     9580
gamifiedFALSE:exp_group_c:gamified_first_c 1.00     5514    10973
gamifiedTRUE:exp_group_c:gamified_first_c  1.00     5556    10691

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

Inspect the posterior sample distributions of the fixed effects:

plot(m_learn_acc_brms, nvariables = 8, variable = "^b_", regex = TRUE)

Bayes factors

Do a hypothesis test for all fixed-effect coefficients (both main effects and interactions) in the model being equal to zero. The column Evid.Ratio shows the Bayes Factor in favour of the null hypothesis (\(BF_{01}\)).

h_learn_acc <- hypothesis(m_learn_acc_brms,
                          c("gamifiedTRUE = 0",
                            "gamifiedFALSE:exp_group_c = 0",
                            "gamifiedTRUE:exp_group_c = 0",
                            "gamifiedFALSE:gamified_first_c = 0",
                            "gamifiedTRUE:gamified_first_c = 0",
                            "gamifiedFALSE:exp_group_c:gamified_first_c = 0",
                            "gamifiedTRUE:exp_group_c:gamified_first_c = 0"),
                          class = "b")

h_learn_acc$hypothesis |>
  mutate(BF10 = 1 / Evid.Ratio,
         evidence_for_null = sapply(Evid.Ratio, bf_to_strength))

This hypothesis test is calculating the Savage-Dickey density ratio at zero, which is a ratio of the posterior density at zero relative to the prior density at zero (indicated by dashed vertical line). Values above 1 indicate a stronger belief that the effect is indeed zero after having observed the data.

sd_ratio_acc <- plot(h_learn_acc, nvariables = 8, variable = "^b_", regex = TRUE, plot = FALSE)

sd_ratio_acc[[1]] +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey25")

Conclusion

The Bayesian model finds anecdotal to strong evidence in favour of the null hypothesis (no effect on learning accuracy) for each of the regression coefficients.

Response time

Response time on correct answers only.

Prepare data

To keep the visualisation of average response times by condition simple, we calculate the median RT per participant, and then take the mean and SD of these medians (which are themselves roughly normally distributed).

d_learn_rt <- d_learn |>
  filter(!study_trial) |>
  filter(correct) |>
  mutate(rt = rt / 1000) |>
  group_by(subject, exp_group, block, condition, gamified, gamified_first) |>
  summarise(rt_median = median(rt, na.rm = TRUE))
`summarise()` has grouped output by 'subject', 'exp_group', 'block',
'condition', 'gamified'. You can override using the `.groups` argument.
d_learn_rt_agg <- d_learn_rt |>
  group_by(block, condition, gamified, gamified_first, exp_group) |>
  summarise(rt_mean = mean(rt_median, na.rm = T),
            rt_se = sd(rt_median, na.rm = T)/sqrt(n())) |>
  ungroup() |>
  add_experiment_cols()
`summarise()` has grouped output by 'block', 'condition', 'gamified',
'gamified_first'. You can override using the `.groups` argument.

Visualise data

p_learn_rt <- plot_data(d_learn_rt_agg, rt_mean, rt_se, "Response time (s)") +
  scale_y_continuous(limits = c(1.3, 1.8), labels = scales::comma_format())

p_learn_rt

Fit frequentist model

Since RT data is not normally distributed, we fit a lognormal model to the response times. (See https://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#gamma-glmms .) Prepare data for modelling by mean-centering categorical predictors:

d_learn_rt_m <- d_learn |>
  filter(!study_trial) |>
  filter(correct) |>
  mutate(log_rt = log(rt / 1000)) |>
  mutate(exp_group_c = ifelse(exp_group == "score", 0, 1),
         exp_group_c = exp_group_c - mean(exp_group_c),
         gamified_first_c = gamified_first - mean(gamified_first)
         )
m_learn_rt <- lmer(log_rt ~ gamified +
                      gamified:exp_group_c +
                      gamified:gamified_first_c +
                      gamified:gamified_first_c:exp_group_c +
                      (1 | subject) + (1 | fact),
                    data = d_learn_rt_m)

summary(m_learn_rt)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
log_rt ~ gamified + gamified:exp_group_c + gamified:gamified_first_c +  
    gamified:gamified_first_c:exp_group_c + (1 | subject) + (1 |      fact)
   Data: d_learn_rt_m

REML criterion at convergence: 58081.7

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-15.6337  -0.5696  -0.1470   0.4864   4.8537 

Random effects:
 Groups   Name        Variance Std.Dev.
 subject  (Intercept) 0.03589  0.1895  
 fact     (Intercept) 0.01190  0.1091  
 Residual             0.32134  0.5669  
Number of obs: 33657, groups:  subject, 166; fact, 78

Fixed effects:
                                             Estimate Std. Error         df
(Intercept)                                 4.837e-01  1.973e-02  2.287e+02
gamifiedTRUE                                1.734e-02  6.240e-03  3.347e+04
gamifiedFALSE:exp_group_c                   9.712e-03  3.077e-02  1.754e+02
gamifiedTRUE:exp_group_c                    4.371e-02  3.078e-02  1.757e+02
gamifiedFALSE:gamified_first_c              1.335e-03  3.084e-02  1.753e+02
gamifiedTRUE:gamified_first_c               7.474e-02  3.085e-02  1.754e+02
gamifiedFALSE:exp_group_c:gamified_first_c -3.774e-02  6.175e-02  1.755e+02
gamifiedTRUE:exp_group_c:gamified_first_c  -2.206e-02  6.175e-02  1.754e+02
                                           t value Pr(>|t|)    
(Intercept)                                 24.522  < 2e-16 ***
gamifiedTRUE                                 2.779  0.00546 ** 
gamifiedFALSE:exp_group_c                    0.316  0.75264    
gamifiedTRUE:exp_group_c                     1.420  0.15738    
gamifiedFALSE:gamified_first_c               0.043  0.96553    
gamifiedTRUE:gamified_first_c                2.423  0.01640 *  
gamifiedFALSE:exp_group_c:gamified_first_c  -0.611  0.54185    
gamifiedTRUE:exp_group_c:gamified_first_c   -0.357  0.72138    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
              (Intr) gmTRUE gmfdFALSE:x__ gmfdTRUE:x__ gmfdFALSE:g__
gamifidTRUE   -0.158                                                
gmfdFALSE:x__ -0.020  0.000                                         
gmfdTRUE:x__  -0.020  0.000  0.917                                  
gmfdFALSE:g__ -0.017  0.005 -0.003        -0.004                    
gmfdTRUE:g__  -0.016  0.006 -0.004        -0.003        0.918       
gFALSE:__:_   -0.002 -0.002 -0.022        -0.021       -0.025       
gTRUE:__:__   -0.003  0.002 -0.021        -0.018       -0.025       
              gmfdTRUE:g__ gFALSE:__:
gamifidTRUE                          
gmfdFALSE:x__                        
gmfdTRUE:x__                         
gmfdFALSE:g__                        
gmfdTRUE:g__                         
gFALSE:__:_   -0.025                 
gTRUE:__:__   -0.025        0.917    
print_model_table(m_learn_rt)

Effect

Estimate

SE

df

t-value

p-value

Intercept (Control)

0.484

0.020

228.75

24.52

<.001

Gamification

0.017

0.006

33,473.10

2.78

.005

Type of gamification (Points-and-progress - Points)

0.044

0.031

175.68

1.42

.157

Type of gamification on Control (Points-and-progress - Points)

0.010

0.031

175.40

0.32

.753

Block on gamification (1 - 2)

0.075

0.031

175.37

2.42

.016

Block on Control (2 - 1)

0.001

0.031

175.29

0.04

.966

Type of gamification; block difference (Block 1 - Block 2)

-0.022

0.062

175.44

-0.36

.721

Type of gamification on Control; block difference (Block 2 - Block 1)

-0.038

0.062

175.48

-0.61

.542

Fitted values

d_model_fit <- crossing(
  gamified = c(FALSE, TRUE), 
  exp_group_c = 0, 
  gamified_first_c = 0
)

d_model_fit$model_fit <- predict(m_learn_rt,
                                 newdata = d_model_fit,
                                 re.form = NA, 
                                 type = "response") |>
  exp() # Transform logRT to RT

d_model_fit
d_model_fit <- crossing(
  gamified = c(FALSE, TRUE), 
  exp_group_c = 0, 
  gamified_first_c = sort(unique(d_learn_rt_m$gamified_first_c))
)

d_model_fit$model_fit <- predict(m_learn_rt,
                                 newdata = d_model_fit,
                                 re.form = NA, 
                                 type = "response") |>
  exp() # Transform logRT to RT

d_model_fit

Visualise fitted model

p_learn_rt_m <- plot_model_fit(m_learn_rt, d_learn_rt_m, exp_trans = TRUE, y_lab = "Response time (s)") +
  scale_y_continuous(limits = c(1.4, 1.9), labels = scales::comma_format())
  block           condition gamified gamified_first exp_group gamified_first_c
1     1             Control    FALSE          FALSE      both       -0.5311822
2     1             Control    FALSE          FALSE     score       -0.5311822
3     1              Points     TRUE           TRUE     score        0.4688178
4     1 Points-and-progress     TRUE           TRUE      both        0.4688178
5     2             Control    FALSE           TRUE      both        0.4688178
6     2             Control    FALSE           TRUE     score        0.4688178
7     2              Points     TRUE          FALSE     score       -0.5311822
8     2 Points-and-progress     TRUE          FALSE      both       -0.5311822
  exp_group_c pred_val     exp_order     type
1   0.5378079 1.647124  Control—Both  Control
2  -0.4621921 1.598827 Control—Score  Control
3  -0.4621921 1.683192 Score—Control Gamified
4   0.5378079 1.740307  Both—Control Gamified
5   0.5378079 1.616182  Both—Control  Control
6  -0.4621921 1.629135 Score—Control  Control
7  -0.4621921 1.546128 Control—Score Gamified
8   0.5378079 1.634243  Control—Both Gamified
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
p_learn_rt_m

Fit Bayesian model

Fit again with brms so that we can calculate Bayes Factors. Because we expect any fixed effects to be at most moderate in size, we will use a weakly informative Normal(0, .1) prior for these effects.

m_learn_rt_brms <- brm(log_rt ~ gamified +
                         gamified:exp_group_c +
                         gamified:gamified_first_c +
                         gamified:gamified_first_c:exp_group_c +
                         (1 | subject) + (1 | fact),
                       family = "gaussian",
                       data = d_learn_rt_m,
                       prior = set_prior("normal(0, .1)", class = "b"),
                       chains = 4,
                       iter = 11000,
                       warmup = 1000,
                       sample_prior = TRUE,
                       future = TRUE,
                       seed = 0)
Compiling Stan program...
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 15.0.0 (clang-1500.3.9.4)’
using SDK: ‘MacOSX14.4.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
         ^~~~~~~
1 error generated.
make[1]: *** [foo.o] Error 1
Start sampling

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.00168 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 16.8 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 1: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 1: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 1: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 1: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 1: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 1: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 1: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 1: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 1: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 1: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 1: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 63.096 seconds (Warm-up)
Chain 1:                387.589 seconds (Sampling)
Chain 1:                450.685 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 0.001877 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 18.77 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 2: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 2: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 2: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 2: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 2: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 2: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 2: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 2: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 2: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 2: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 2: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 70.219 seconds (Warm-up)
Chain 2:                379.585 seconds (Sampling)
Chain 2:                449.804 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 0.002091 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 20.91 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 3: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 3: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 3: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 3: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 3: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 3: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 3: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 3: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 3: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 3: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 3: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 60.968 seconds (Warm-up)
Chain 3:                361.703 seconds (Sampling)
Chain 3:                422.671 seconds (Total)
Chain 3: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 0.001896 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 18.96 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 4: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 4: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 4: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 4: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 4: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 4: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 4: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 4: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 4: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 4: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 4: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 131.771 seconds (Warm-up)
Chain 4:                360.195 seconds (Sampling)
Chain 4:                491.966 seconds (Total)
Chain 4: 
summary(m_learn_rt_brms)
 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: log_rt ~ gamified + gamified:exp_group_c + gamified:gamified_first_c + gamified:gamified_first_c:exp_group_c + (1 | subject) + (1 | fact) 
   Data: d_learn_rt_m (Number of observations: 33657) 
  Draws: 4 chains, each with iter = 11000; warmup = 1000; thin = 1;
         total post-warmup draws = 40000

Multilevel Hyperparameters:
~fact (Number of levels: 78) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     0.11      0.01     0.09     0.13 1.00     8718    16292

~subject (Number of levels: 166) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     0.19      0.01     0.17     0.21 1.00     7137    14437

Regression Coefficients:
                                           Estimate Est.Error l-95% CI u-95% CI
Intercept                                      0.48      0.02     0.44     0.52
gamifiedTRUE                                   0.02      0.01     0.01     0.03
gamifiedFALSE:exp_group_c                      0.01      0.03    -0.05     0.06
gamifiedTRUE:exp_group_c                       0.04      0.03    -0.02     0.10
gamifiedFALSE:gamified_first_c                -0.01      0.03    -0.06     0.05
gamifiedTRUE:gamified_first_c                  0.07      0.03     0.01     0.12
gamifiedFALSE:exp_group_c:gamified_first_c    -0.02      0.05    -0.12     0.07
gamifiedTRUE:exp_group_c:gamified_first_c     -0.01      0.05    -0.10     0.09
                                           Rhat Bulk_ESS Tail_ESS
Intercept                                  1.00     4838     9834
gamifiedTRUE                               1.00   101540    27412
gamifiedFALSE:exp_group_c                  1.00     5027    10328
gamifiedTRUE:exp_group_c                   1.00     4972    10050
gamifiedFALSE:gamified_first_c             1.00     4675     9141
gamifiedTRUE:gamified_first_c              1.00     4765     9731
gamifiedFALSE:exp_group_c:gamified_first_c 1.00     6758    11635
gamifiedTRUE:exp_group_c:gamified_first_c  1.00     6701    12077

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.57      0.00     0.56     0.57 1.00    88321    27944

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

Inspect the posterior sample distributions of the fixed effects:

plot(m_learn_rt_brms, nvariables = 8, variable = "^b_", regex = TRUE)

Bayes factors

Do a hypothesis learn for all fixed-effect coefficients (both main effects and interactions) in the model being equal to zero. The column Evid.Ratio shows the Bayes Factor in favour of the null hypothesis (\(BF_{01}\)).

h_learn_rt <- hypothesis(m_learn_rt_brms,
                         c("gamifiedTRUE = 0",
                           "gamifiedFALSE:exp_group_c = 0",
                           "gamifiedTRUE:exp_group_c = 0",
                           "gamifiedFALSE:gamified_first_c = 0",
                           "gamifiedTRUE:gamified_first_c = 0",
                           "gamifiedFALSE:exp_group_c:gamified_first_c = 0",
                           "gamifiedTRUE:exp_group_c:gamified_first_c = 0"),
                         class = "b")


h_learn_rt$hypothesis |>
  mutate(BF10 = 1 / Evid.Ratio,
         evidence_for_null = sapply(Evid.Ratio, bf_to_strength))

This hypothesis test is calculating the Savage-Dickey density ratio at zero, which is a ratio of the posterior density at zero relative to the prior density at zero (indicated by dashed vertical line). Values above 1 indicate a stronger belief that the effect is indeed zero after having observed the data.

sd_ratio_rt <- plot(h_learn_rt, nvariables = 8, variable = "^b_", regex = TRUE, plot = FALSE)

sd_ratio_rt[[1]] +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey25")

Fitted values

d_model_fit <- crossing(
  gamified = c(FALSE, TRUE), 
  exp_group_c = 0, 
  gamified_first_c = sort(unique(d_learn_rt_m$gamified_first_c))
)

d_model_fit$model_fit <- predict(m_learn_rt_brms,
                                 newdata = d_model_fit,
                                 re_formula = NA,
                                 type = "response")[,1] |>
  exp() # Transform logRT to RT

d_model_fit

Conclusion

The Bayesian model finds anecdotal to strong evidence in favour of the null hypothesis (no effect on correct RT) for each of the regression coefficients.

Total score

The total score is the number of points after the last trial in a block.

Prepare data

d_learn_score <- d_learn |>
  group_by(subject, exp_group, block, condition, gamified, gamified_first) |>
  slice(n())

d_learn_score_agg <- d_learn_score |>
  group_by(block, condition, gamified, gamified_first, exp_group) |>
  summarise(feedback_score_mean = mean(feedback_score, na.rm = T),
            feedback_score_se = sd(feedback_score, na.rm = T)/sqrt(n())) |>
  ungroup() |>
  add_experiment_cols()
`summarise()` has grouped output by 'block', 'condition', 'gamified',
'gamified_first'. You can override using the `.groups` argument.

Visualise data

p_learn_score <- plot_data(d_learn_score_agg, feedback_score_mean, feedback_score_se, "Total score") +
  scale_y_continuous(limits = c(1000, 1400), labels = scales::comma_format())

p_learn_score

Distribution of scores:

p_learn_score_dist <- ggplot(d_learn_score, aes(x = feedback_score, fill = condition)) +
  facet_grid(condition ~ .) +
  geom_histogram(aes(y=..density..), colour = "black", binwidth = 100) +
  geom_density(alpha = .5) +
  geom_vline(xintercept = c(1200, 1500), lty = 2) +
  scale_fill_manual(values = col_condition) +
  scale_colour_manual(values = col_condition) +
  guides(fill = "none",
         colour = "none") +
  labs(x = "Total score",
       y = "Density") +
  theme_paper

p_learn_score_dist
Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(density)` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

ggsave(here("output", "practice_scores.png"), width = 7.5, height = 5)

Fit frequentist model

Prepare data for modelling by mean-centering categorical predictors:

d_learn_score_m <- d_learn_score |>
  ungroup() |>
  mutate(exp_group_c = ifelse(exp_group == "score", 0, 1),
         exp_group_c = exp_group_c - mean(exp_group_c),
         gamified_first_c = gamified_first - mean(gamified_first))
m_learn_score <- lmer(feedback_score ~ gamified +
                       gamified:exp_group_c +
                       gamified:gamified_first_c +
                       gamified:gamified_first_c:exp_group_c +
                       (1 | subject),
                     data = d_learn_score_m)

summary(m_learn_score)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
feedback_score ~ gamified + gamified:exp_group_c + gamified:gamified_first_c +  
    gamified:gamified_first_c:exp_group_c + (1 | subject)
   Data: d_learn_score_m

REML criterion at convergence: 4534.5

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.48749 -0.54087 -0.03256  0.49006  2.49651 

Random effects:
 Groups   Name        Variance Std.Dev.
 subject  (Intercept) 67352    259.5   
 Residual             25509    159.7   
Number of obs: 332, groups:  subject, 166

Fixed effects:
                                           Estimate Std. Error       df t value
(Intercept)                                1245.256     23.652  212.312  52.649
gamifiedTRUE                                 -4.856     17.531  162.000  -0.277
gamifiedFALSE:exp_group_c                   -72.770     47.359  212.312  -1.537
gamifiedTRUE:exp_group_c                    -72.704     47.359  212.312  -1.535
gamifiedFALSE:gamified_first_c                7.425     47.473  212.312   0.156
gamifiedTRUE:gamified_first_c              -123.533     47.473  212.312  -2.602
gamifiedFALSE:exp_group_c:gamified_first_c  -74.168     95.059  212.312  -0.780
gamifiedTRUE:exp_group_c:gamified_first_c   -85.754     95.059  212.312  -0.902
                                           Pr(>|t|)    
(Intercept)                                 < 2e-16 ***
gamifiedTRUE                                0.78215    
gamifiedFALSE:exp_group_c                   0.12589    
gamifiedTRUE:exp_group_c                    0.12623    
gamifiedFALSE:gamified_first_c              0.87586    
gamifiedTRUE:gamified_first_c               0.00992 ** 
gamifiedFALSE:exp_group_c:gamified_first_c  0.43613    
gamifiedTRUE:exp_group_c:gamified_first_c   0.36802    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
              (Intr) gmTRUE gmfdFALSE:x__ gmfdTRUE:x__ gmfdFALSE:g__
gamifidTRUE   -0.371                                                
gmfdFALSE:x__  0.000  0.000                                         
gmfdTRUE:x__   0.000  0.000  0.725                                  
gmfdFALSE:g__  0.000  0.000 -0.004        -0.003                    
gmfdTRUE:g__   0.000  0.000 -0.003        -0.004        0.725       
gFALSE:__:_   -0.004  0.002  0.000         0.000        0.001       
gTRUE:__:__   -0.003 -0.002  0.000         0.000        0.001       
              gmfdTRUE:g__ gFALSE:__:
gamifidTRUE                          
gmfdFALSE:x__                        
gmfdTRUE:x__                         
gmfdFALSE:g__                        
gmfdTRUE:g__                         
gFALSE:__:_    0.001                 
gTRUE:__:__    0.001        0.725    
print_model_table(m_learn_score)

Effect

Estimate

SE

df

t-value

p-value

Intercept (Control)

1,245.256

23.652

212.31

52.65

<.001

Gamification

-4.856

17.531

162.00

-0.28

.782

Type of gamification (Points-and-progress - Points)

-72.704

47.359

212.31

-1.54

.126

Type of gamification on Control (Points-and-progress - Points)

-72.770

47.359

212.31

-1.54

.126

Block on gamification (1 - 2)

-123.533

47.473

212.31

-2.60

.010

Block on Control (2 - 1)

7.425

47.473

212.31

0.16

.876

Type of gamification; block difference (Block 1 - Block 2)

-85.754

95.059

212.31

-0.90

.368

Type of gamification on Control; block difference (Block 2 - Block 1)

-74.168

95.059

212.31

-0.78

.436

Fitted values

d_model_fit <- crossing(
  gamified = TRUE, 
  exp_group_c = 0,
  gamified_first_c = sort(unique(d_learn_score_m$gamified_first_c))
)

d_model_fit$model_fit <- predict(m_learn_score,
                                 newdata = d_model_fit,
                                 re.form = NA, 
                                 type = "response")

d_model_fit

Fit Bayesian model

Since score is on a much larger scale than other dependent variables, we expect coefficients for fixed effects to be on a much larger scale too. Adjust the prior accordingly:

m_learn_score_bayes <- brm(feedback_score ~ gamified +
                             gamified:exp_group_c +
                             gamified:gamified_first_c +
                             gamified:gamified_first_c:exp_group_c +
                             (1 | subject),
                           family = "gaussian",
                           data = d_learn_score_m,
                           prior = set_prior("normal(0, 100)", class = "b"),
                           chains = 4,
                           iter = 11000,
                           warmup = 1000,
                           sample_prior = TRUE,
                           future = TRUE,
                           seed = 0)
Compiling Stan program...
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 15.0.0 (clang-1500.3.9.4)’
using SDK: ‘MacOSX14.4.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
         ^~~~~~~
1 error generated.
make[1]: *** [foo.o] Error 1
Start sampling

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000108 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.08 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 1: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 1: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 1: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 1: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 1: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 1: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 1: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 1: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 1: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 1: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 1: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 0.667 seconds (Warm-up)
Chain 1:                2.107 seconds (Sampling)
Chain 1:                2.774 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 0.000101 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.01 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 2: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 2: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 2: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 2: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 2: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 2: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 2: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 2: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 2: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 2: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 2: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 0.664 seconds (Warm-up)
Chain 2:                2.106 seconds (Sampling)
Chain 2:                2.77 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 0.000107 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 1.07 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 3: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 3: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 3: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 3: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 3: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 3: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 3: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 3: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 3: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 3: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 3: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 0.727 seconds (Warm-up)
Chain 3:                2.108 seconds (Sampling)
Chain 3:                2.835 seconds (Total)
Chain 3: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 9.9e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.99 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 4: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 4: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 4: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 4: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 4: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 4: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 4: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 4: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 4: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 4: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 4: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 0.7 seconds (Warm-up)
Chain 4:                2.12 seconds (Sampling)
Chain 4:                2.82 seconds (Total)
Chain 4: 
summary(m_learn_score_bayes)
 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: feedback_score ~ gamified + gamified:exp_group_c + gamified:gamified_first_c + gamified:gamified_first_c:exp_group_c + (1 | subject) 
   Data: d_learn_score_m (Number of observations: 332) 
  Draws: 4 chains, each with iter = 11000; warmup = 1000; thin = 1;
         total post-warmup draws = 40000

Multilevel Hyperparameters:
~subject (Number of levels: 166) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)   259.54     17.49   227.10   295.57 1.00    11131    20038

Regression Coefficients:
                                           Estimate Est.Error l-95% CI u-95% CI
Intercept                                   1245.28     23.67  1198.74  1291.38
gamifiedTRUE                                  -4.70     17.40   -38.90    29.50
gamifiedFALSE:exp_group_c                    -51.79     40.99  -133.20    28.87
gamifiedTRUE:exp_group_c                     -51.82     41.01  -132.72    29.14
gamifiedFALSE:gamified_first_c                19.51     41.27   -61.15   100.19
gamifiedTRUE:gamified_first_c               -103.71     41.42  -184.36   -21.94
gamifiedFALSE:exp_group_c:gamified_first_c   -26.03     63.82  -151.67    98.47
gamifiedTRUE:exp_group_c:gamified_first_c    -35.52     64.01  -160.77    90.96
                                           Rhat Bulk_ESS Tail_ESS
Intercept                                  1.00    12041    20700
gamifiedTRUE                               1.00    85749    28038
gamifiedFALSE:exp_group_c                  1.00    13502    22943
gamifiedTRUE:exp_group_c                   1.00    13959    22394
gamifiedFALSE:gamified_first_c             1.00    13528    22841
gamifiedTRUE:gamified_first_c              1.00    13690    22803
gamifiedFALSE:exp_group_c:gamified_first_c 1.00    23052    28888
gamifiedTRUE:exp_group_c:gamified_first_c  1.00    22372    28974

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma   160.88      9.02   144.43   179.62 1.00    18231    26019

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

Inspect the posterior sample distributions of the fixed effects:

plot(m_learn_score_bayes, nvariables = 8, variable = "^b_", regex = TRUE)

Bayes factors

Do a hypothesis learn for all fixed-effect coefficients (both main effects and interactions) in the model being equal to zero. The column Evid.Ratio shows the Bayes Factor in favour of the null hypothesis (\(BF_{01}\)).

h_learn_score <- hypothesis(m_learn_score_bayes,
                            c("gamifiedTRUE = 0",
                              "gamifiedFALSE:exp_group_c = 0",
                              "gamifiedTRUE:exp_group_c = 0",
                              "gamifiedFALSE:gamified_first_c = 0",
                              "gamifiedTRUE:gamified_first_c = 0",
                              "gamifiedFALSE:exp_group_c:gamified_first_c = 0",
                              "gamifiedTRUE:exp_group_c:gamified_first_c = 0"),
                            class = "b")


h_learn_score$hypothesis |>
  mutate(BF10 = 1 / Evid.Ratio,
         evidence_for_null = sapply(Evid.Ratio, bf_to_strength))

This hypothesis test is calculating the Savage-Dickey density ratio at zero, which is a ratio of the posterior density at zero relative to the prior density at zero (indicated by dashed vertical line). Values above 1 indicate a stronger belief that the effect is indeed zero after having observed the data.

sd_ratio_score <- plot(h_learn_score, nvariables = 8, variable = "^b_", regex = TRUE, plot = FALSE)

sd_ratio_score[[1]] +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey25")

Fitted values

d_model_fit <- crossing(
  gamified = TRUE, 
  exp_group_c = 0,
  gamified_first_c = sort(unique(d_learn_score_m$gamified_first_c))
)

d_model_fit$model_fit <- predict(m_learn_score_bayes,
                                 newdata = d_model_fit,
                                 re_formula = NA,
                                 type = "response")[,1]

d_model_fit

Number of words practiced

Prepare data

d_learn_words <- d_learn |>
  group_by(subject, exp_group, block, condition, gamified, gamified_first) |>
  summarise(words_seen = n_distinct(fact))
`summarise()` has grouped output by 'subject', 'exp_group', 'block',
'condition', 'gamified'. You can override using the `.groups` argument.
d_learn_words_agg <- d_learn_words |>
  group_by(block, condition, gamified, gamified_first, exp_group) |>
  summarise(words_mean = mean(words_seen, na.rm = T),
            words_se = sd(words_seen, na.rm = T)/sqrt(n())) |>
  ungroup() |>
  add_experiment_cols()
`summarise()` has grouped output by 'block', 'condition', 'gamified',
'gamified_first'. You can override using the `.groups` argument.

Visualise data

p_learn_words <- plot_data(d_learn_words_agg, words_mean, words_se, "Words practiced") +
  scale_y_continuous(limits = c(20, 30))

p_learn_words

Fit frequentist model

Prepare data for modelling by mean-centering categorical predictors:

d_learn_words_m <- d_learn_words |>
  ungroup() |>
  mutate(exp_group_c = ifelse(exp_group == "score", 0, 1),
         exp_group_c = exp_group_c - mean(exp_group_c),
         gamified_first_c = gamified_first - mean(gamified_first))
m_learn_words <- lmer(words_seen ~ gamified +
                       gamified:exp_group_c +
                       gamified:gamified_first_c +
                       gamified:gamified_first_c:exp_group_c +
                       (1 | subject),
                     data = d_learn_words_m)

summary(m_learn_words)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
words_seen ~ gamified + gamified:exp_group_c + gamified:gamified_first_c +  
    gamified:gamified_first_c:exp_group_c + (1 | subject)
   Data: d_learn_words_m

REML criterion at convergence: 2201.5

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.22686 -0.49906  0.05179  0.50386  2.35412 

Random effects:
 Groups   Name        Variance Std.Dev.
 subject  (Intercept) 57.91    7.610   
 Residual             17.11    4.137   
Number of obs: 332, groups:  subject, 166

Fixed effects:
                                             Estimate Std. Error         df
(Intercept)                                 25.725008   0.672290 203.027802
gamifiedTRUE                                 0.002888   0.454075 162.000006
gamifiedFALSE:exp_group_c                   -1.017908   1.346145 203.027801
gamifiedTRUE:exp_group_c                    -1.655928   1.346145 203.027801
gamifiedFALSE:gamified_first_c               0.317483   1.349388 203.027802
gamifiedTRUE:gamified_first_c               -3.005740   1.349388 203.027802
gamifiedFALSE:exp_group_c:gamified_first_c  -2.082412   2.702006 203.027802
gamifiedTRUE:exp_group_c:gamified_first_c   -4.924329   2.702006 203.027802
                                           t value Pr(>|t|)    
(Intercept)                                 38.265   <2e-16 ***
gamifiedTRUE                                 0.006   0.9949    
gamifiedFALSE:exp_group_c                   -0.756   0.4504    
gamifiedTRUE:exp_group_c                    -1.230   0.2201    
gamifiedFALSE:gamified_first_c               0.235   0.8142    
gamifiedTRUE:gamified_first_c               -2.227   0.0270 *  
gamifiedFALSE:exp_group_c:gamified_first_c  -0.771   0.4418    
gamifiedTRUE:exp_group_c:gamified_first_c   -1.822   0.0699 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
              (Intr) gmTRUE gmfdFALSE:x__ gmfdTRUE:x__ gmfdFALSE:g__
gamifidTRUE   -0.338                                                
gmfdFALSE:x__  0.000  0.000                                         
gmfdTRUE:x__   0.000  0.000  0.772                                  
gmfdFALSE:g__  0.000  0.000 -0.004        -0.003                    
gmfdTRUE:g__   0.000  0.000 -0.003        -0.004        0.772       
gFALSE:__:_   -0.004  0.001  0.000         0.000        0.001       
gTRUE:__:__   -0.003 -0.001  0.000         0.000        0.001       
              gmfdTRUE:g__ gFALSE:__:
gamifidTRUE                          
gmfdFALSE:x__                        
gmfdTRUE:x__                         
gmfdFALSE:g__                        
gmfdTRUE:g__                         
gFALSE:__:_    0.001                 
gTRUE:__:__    0.001        0.772    
print_model_table(m_learn_words)

Effect

Estimate

SE

df

t-value

p-value

Intercept (Control)

25.725

0.672

203.03

38.26

<.001

Gamification

0.003

0.454

162.00

0.01

.995

Type of gamification (Points-and-progress - Points)

-1.656

1.346

203.03

-1.23

.220

Type of gamification on Control (Points-and-progress - Points)

-1.018

1.346

203.03

-0.76

.450

Block on gamification (1 - 2)

-3.006

1.349

203.03

-2.23

.027

Block on Control (2 - 1)

0.317

1.349

203.03

0.24

.814

Type of gamification; block difference (Block 1 - Block 2)

-4.924

2.702

203.03

-1.82

.070

Type of gamification on Control; block difference (Block 2 - Block 1)

-2.082

2.702

203.03

-0.77

.442

Fitted values

d_model_fit <- crossing(
  gamified = TRUE, 
  exp_group_c = 0,
  gamified_first_c = sort(unique(d_learn_words_m$gamified_first_c))
)

d_model_fit$model_fit <- predict(m_learn_words,
                                 newdata = d_model_fit,
                                 re.form = NA, 
                                 type = "response")

d_model_fit

Visualise fitted model

p_learn_words_m <- plot_model_fit(m_learn_words, d_learn_words_m, y_lab = "Words practiced") +
  scale_y_continuous(limits = c(20, 30))
  block           condition gamified gamified_first exp_group gamified_first_c
1     1             Control    FALSE          FALSE      both       -0.5421687
2     1             Control    FALSE          FALSE     score       -0.5421687
3     1              Points     TRUE           TRUE     score        0.4578313
4     1 Points-and-progress     TRUE           TRUE      both        0.4578313
5     2             Control    FALSE           TRUE      both        0.4578313
6     2             Control    FALSE           TRUE     score        0.4578313
7     2              Points     TRUE          FALSE     score       -0.5421687
8     2 Points-and-progress     TRUE          FALSE      both       -0.5421687
  exp_group_c pred_val     exp_order     type
1   0.5240964 25.61111  Control—Both  Control
2  -0.4759036 25.50000 Control—Score  Control
3  -0.4759036 26.21277 Score—Control Gamified
4   0.5240964 22.30233  Both—Control Gamified
5   0.5240964 24.83721  Both—Control  Control
6  -0.4759036 26.80851 Score—Control  Control
7  -0.4759036 26.87500 Control—Score Gamified
8   0.5240964 27.88889  Control—Both Gamified
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
p_learn_words_m

Fit Bayesian model

As before, we’ll adjust the prior to fit the wider range of likely coefficients, given the scale of the dependent variable.

m_learn_words_bayes <- brm(words_seen ~ gamified +
                           gamified:exp_group_c +
                           gamified:gamified_first_c +
                           gamified:gamified_first_c:exp_group_c +
                           (1 | subject),
                         family = "gaussian",
                         data = d_learn_words_m,
                         prior = set_prior("normal(0, 3)", class = "b"),
                         chains = 4,
                         iter = 11000,
                         warmup = 1000,
                         sample_prior = TRUE,
                         future = TRUE,
                         seed = 0)
Compiling Stan program...
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 15.0.0 (clang-1500.3.9.4)’
using SDK: ‘MacOSX14.4.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
         ^~~~~~~
1 error generated.
make[1]: *** [foo.o] Error 1
Start sampling

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000109 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.09 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 1: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 1: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 1: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 1: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 1: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 1: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 1: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 1: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 1: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 1: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 1: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 0.304 seconds (Warm-up)
Chain 1:                2.132 seconds (Sampling)
Chain 1:                2.436 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 9.7e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.97 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 2: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 2: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 2: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 2: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 2: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 2: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 2: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 2: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 2: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 2: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 2: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 0.289 seconds (Warm-up)
Chain 2:                2.118 seconds (Sampling)
Chain 2:                2.407 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 0.000101 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 1.01 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 3: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 3: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 3: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 3: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 3: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 3: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 3: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 3: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 3: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 3: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 3: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 0.293 seconds (Warm-up)
Chain 3:                2.114 seconds (Sampling)
Chain 3:                2.407 seconds (Total)
Chain 3: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 0.0001 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 4: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 4: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 4: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 4: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 4: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 4: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 4: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 4: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 4: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 4: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 4: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 0.288 seconds (Warm-up)
Chain 4:                2.134 seconds (Sampling)
Chain 4:                2.422 seconds (Total)
Chain 4: 
summary(m_learn_words_bayes)
 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: words_seen ~ gamified + gamified:exp_group_c + gamified:gamified_first_c + gamified:gamified_first_c:exp_group_c + (1 | subject) 
   Data: d_learn_words_m (Number of observations: 332) 
  Draws: 4 chains, each with iter = 11000; warmup = 1000; thin = 1;
         total post-warmup draws = 40000

Multilevel Hyperparameters:
~subject (Number of levels: 166) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     7.63      0.49     6.72     8.63 1.00     9656    17460

Regression Coefficients:
                                           Estimate Est.Error l-95% CI u-95% CI
Intercept                                     25.72      0.68    24.40    27.05
gamifiedTRUE                                   0.00      0.45    -0.89     0.89
gamifiedFALSE:exp_group_c                     -0.68      1.17    -2.95     1.64
gamifiedTRUE:exp_group_c                      -1.29      1.18    -3.59     1.05
gamifiedFALSE:gamified_first_c                 0.60      1.17    -1.71     2.90
gamifiedTRUE:gamified_first_c                 -2.57      1.18    -4.89    -0.26
gamifiedFALSE:exp_group_c:gamified_first_c    -0.26      1.84    -3.87     3.33
gamifiedTRUE:exp_group_c:gamified_first_c     -2.65      1.84    -6.24     0.94
                                           Rhat Bulk_ESS Tail_ESS
Intercept                                  1.00     8243    14913
gamifiedTRUE                               1.00    66510    28768
gamifiedFALSE:exp_group_c                  1.00    10130    17922
gamifiedTRUE:exp_group_c                   1.00     9760    17595
gamifiedFALSE:gamified_first_c             1.00    10217    19276
gamifiedTRUE:gamified_first_c              1.00    10360    18651
gamifiedFALSE:exp_group_c:gamified_first_c 1.00    15914    25086
gamifiedTRUE:exp_group_c:gamified_first_c  1.00    15977    26780

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     4.17      0.23     3.74     4.66 1.00    18196    28114

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

Inspect the posterior sample distributions of the fixed effects:

plot(m_learn_words_bayes, nvariables = 8, variable = "^b_", regex = TRUE)

Bayes factors

Do a hypothesis learn for all fixed-effect coefficients (both main effects and interactions) in the model being equal to zero. The column Evid.Ratio shows the Bayes Factor in favour of the null hypothesis (\(BF_{01}\)).

h_learn_words <- hypothesis(m_learn_words_bayes,
                            c("gamifiedTRUE = 0",
                              "gamifiedFALSE:exp_group_c = 0",
                              "gamifiedTRUE:exp_group_c = 0",
                              "gamifiedFALSE:gamified_first_c = 0",
                              "gamifiedTRUE:gamified_first_c = 0",
                              "gamifiedFALSE:exp_group_c:gamified_first_c = 0",
                              "gamifiedTRUE:exp_group_c:gamified_first_c = 0"),
                            class = "b")


h_learn_words$hypothesis |>
  mutate(BF10 = 1 / Evid.Ratio,
         evidence_for_null = sapply(Evid.Ratio, bf_to_strength))

This hypothesis test is calculating the Savage-Dickey density ratio at zero, which is a ratio of the posterior density at zero relative to the prior density at zero (indicated by dashed vertical line). Values above 1 indicate a stronger belief that the effect is indeed zero after having observed the data.

sd_ratio_words <- plot(h_learn_words, nvariables = 8, variable = "^b_", regex = TRUE, plot = FALSE)

sd_ratio_words[[1]] +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey25")

Completed trials

Prepare data

d_learn_trials <- d_learn |>
  group_by(subject, exp_group, block, condition, gamified, gamified_first) |>
  summarise(n_trials = n())
`summarise()` has grouped output by 'subject', 'exp_group', 'block',
'condition', 'gamified'. You can override using the `.groups` argument.
d_learn_trials_agg <- d_learn_trials |>
  group_by(block, condition, gamified, gamified_first, exp_group) |>
  summarise(trials_mean = mean(n_trials, na.rm = T),
            trials_se = sd(n_trials, na.rm = T)/sqrt(n())) |>
  ungroup() |>
  add_experiment_cols()
`summarise()` has grouped output by 'block', 'condition', 'gamified',
'gamified_first'. You can override using the `.groups` argument.

Visualise data

p_learn_trials <- plot_data(d_learn_trials_agg, trials_mean, trials_se, "Completed trials") +
  scale_y_continuous(limits = c(130, 170))

p_learn_trials

Fit frequentist model

Prepare data for modelling by mean-centering categorical predictors:

d_learn_trials_m <- d_learn_trials |>
  ungroup() |>
  mutate(exp_group_c = ifelse(exp_group == "score", 0, 1),
         exp_group_c = exp_group_c - mean(exp_group_c),
         gamified_first_c = gamified_first - mean(gamified_first))
m_learn_trials <- lmer(n_trials ~ gamified +
                       gamified:exp_group_c +
                       gamified:gamified_first_c +
                       gamified:gamified_first_c:exp_group_c +
                       (1 | subject),
                     data = d_learn_trials_m)

summary(m_learn_trials)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
n_trials ~ gamified + gamified:exp_group_c + gamified:gamified_first_c +  
    gamified:gamified_first_c:exp_group_c + (1 | subject)
   Data: d_learn_trials_m

REML criterion at convergence: 2932.7

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.34679 -0.49538 -0.03198  0.45220  2.45632 

Random effects:
 Groups   Name        Variance Std.Dev.
 subject  (Intercept) 586.6    24.22   
 Residual             156.2    12.50   
Number of obs: 332, groups:  subject, 166

Fixed effects:
                                           Estimate Std. Error       df t value
(Intercept)                                151.1115     2.1153 199.5462  71.439
gamifiedTRUE                                -0.8356     1.3717 162.0000  -0.609
gamifiedFALSE:exp_group_c                   -3.6333     4.2355 199.5462  -0.858
gamifiedTRUE:exp_group_c                    -4.5968     4.2355 199.5462  -1.085
gamifiedFALSE:gamified_first_c              -0.6598     4.2457 199.5462  -0.155
gamifiedTRUE:gamified_first_c              -12.1755     4.2457 199.5462  -2.868
gamifiedFALSE:exp_group_c:gamified_first_c  -2.9971     8.5015 199.5462  -0.353
gamifiedTRUE:exp_group_c:gamified_first_c   -4.7077     8.5015 199.5462  -0.554
                                           Pr(>|t|)    
(Intercept)                                 < 2e-16 ***
gamifiedTRUE                                0.54326    
gamifiedFALSE:exp_group_c                   0.39202    
gamifiedTRUE:exp_group_c                    0.27909    
gamifiedFALSE:gamified_first_c              0.87665    
gamifiedTRUE:gamified_first_c               0.00458 ** 
gamifiedFALSE:exp_group_c:gamified_first_c  0.72481    
gamifiedTRUE:exp_group_c:gamified_first_c   0.58037    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
              (Intr) gmTRUE gmfdFALSE:x__ gmfdTRUE:x__ gmfdFALSE:g__
gamifidTRUE   -0.324                                                
gmfdFALSE:x__  0.000  0.000                                         
gmfdTRUE:x__   0.000  0.000  0.790                                  
gmfdFALSE:g__  0.000  0.000 -0.004        -0.003                    
gmfdTRUE:g__   0.000  0.000 -0.003        -0.004        0.790       
gFALSE:__:_   -0.004  0.001  0.000         0.000        0.001       
gTRUE:__:__   -0.003 -0.001  0.000         0.000        0.001       
              gmfdTRUE:g__ gFALSE:__:
gamifidTRUE                          
gmfdFALSE:x__                        
gmfdTRUE:x__                         
gmfdFALSE:g__                        
gmfdTRUE:g__                         
gFALSE:__:_    0.001                 
gTRUE:__:__    0.001        0.790    
print_model_table(m_learn_trials)

Effect

Estimate

SE

df

t-value

p-value

Intercept (Control)

151.111

2.115

199.55

71.44

<.001

Gamification

-0.836

1.372

162.00

-0.61

.543

Type of gamification (Points-and-progress - Points)

-4.597

4.235

199.55

-1.09

.279

Type of gamification on Control (Points-and-progress - Points)

-3.633

4.235

199.55

-0.86

.392

Block on gamification (1 - 2)

-12.175

4.246

199.55

-2.87

.005

Block on Control (2 - 1)

-0.660

4.246

199.55

-0.16

.877

Type of gamification; block difference (Block 1 - Block 2)

-4.708

8.501

199.55

-0.55

.580

Type of gamification on Control; block difference (Block 2 - Block 1)

-2.997

8.501

199.55

-0.35

.725

Fitted values

d_model_fit <- crossing(
  gamified = TRUE, 
  exp_group_c = 0,
  gamified_first_c = sort(unique(d_learn_trials_m$gamified_first_c))
)

d_model_fit$model_fit <- predict(m_learn_trials,
                                 newdata = d_model_fit,
                                 re.form = NA, 
                                 type = "response")

d_model_fit

Visualise fitted model

p_learn_trials_m <- plot_model_fit(m_learn_trials, d_learn_trials_m, y_lab = "Trials completed") +
  scale_y_continuous(limits = c(130, 170))
  block           condition gamified gamified_first exp_group gamified_first_c
1     1             Control    FALSE          FALSE      both       -0.5421687
2     1             Control    FALSE          FALSE     score       -0.5421687
3     1              Points     TRUE           TRUE     score        0.4578313
4     1 Points-and-progress     TRUE           TRUE      both        0.4578313
5     2             Control    FALSE           TRUE      both        0.4578313
6     2             Control    FALSE           TRUE     score        0.4578313
7     2              Points     TRUE          FALSE     score       -0.5421687
8     2 Points-and-progress     TRUE          FALSE      both       -0.5421687
  exp_group_c pred_val     exp_order     type
1   0.5240964 150.4167  Control—Both  Control
2  -0.4759036 152.4250 Control—Score  Control
3  -0.4759036 147.9149 Score—Control Gamified
4   0.5240964 141.1628  Both—Control Gamified
5   0.5240964 148.1860  Both—Control  Control
6  -0.4759036 153.1915 Score—Control  Control
7  -0.4759036 157.8500 Control—Score Gamified
8   0.5240964 155.8056  Control—Both Gamified
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
p_learn_trials_m

Fit Bayesian model

m_learn_trials_bayes <- brm(n_trials ~ gamified +
                           gamified:exp_group_c +
                           gamified:gamified_first_c +
                           gamified:gamified_first_c:exp_group_c +
                           (1 | subject),
                         family = "gaussian",
                         data = d_learn_trials_m,
                         prior = set_prior("normal(0, 10)", class = "b"),
                         chains = 4,
                         iter = 11000,
                         warmup = 1000,
                         sample_prior = TRUE,
                         future = TRUE,
                         seed = 0)
Compiling Stan program...
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 15.0.0 (clang-1500.3.9.4)’
using SDK: ‘MacOSX14.4.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
         ^~~~~~~
1 error generated.
make[1]: *** [foo.o] Error 1
Start sampling

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 9.7e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.97 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 1: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 1: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 1: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 1: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 1: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 1: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 1: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 1: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 1: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 1: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 1: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 0.313 seconds (Warm-up)
Chain 1:                2.11 seconds (Sampling)
Chain 1:                2.423 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 8.9e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.89 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 2: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 2: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 2: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 2: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 2: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 2: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 2: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 2: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 2: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 2: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 2: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 0.557 seconds (Warm-up)
Chain 2:                2.1 seconds (Sampling)
Chain 2:                2.657 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 0.00012 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 1.2 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 3: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 3: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 3: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 3: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 3: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 3: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 3: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 3: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 3: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 3: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 3: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 0.605 seconds (Warm-up)
Chain 3:                2.101 seconds (Sampling)
Chain 3:                2.706 seconds (Total)
Chain 3: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 9e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.9 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:     1 / 11000 [  0%]  (Warmup)
Chain 4: Iteration:  1001 / 11000 [  9%]  (Sampling)
Chain 4: Iteration:  2100 / 11000 [ 19%]  (Sampling)
Chain 4: Iteration:  3200 / 11000 [ 29%]  (Sampling)
Chain 4: Iteration:  4300 / 11000 [ 39%]  (Sampling)
Chain 4: Iteration:  5400 / 11000 [ 49%]  (Sampling)
Chain 4: Iteration:  6500 / 11000 [ 59%]  (Sampling)
Chain 4: Iteration:  7600 / 11000 [ 69%]  (Sampling)
Chain 4: Iteration:  8700 / 11000 [ 79%]  (Sampling)
Chain 4: Iteration:  9800 / 11000 [ 89%]  (Sampling)
Chain 4: Iteration: 10900 / 11000 [ 99%]  (Sampling)
Chain 4: Iteration: 11000 / 11000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 0.577 seconds (Warm-up)
Chain 4:                2.142 seconds (Sampling)
Chain 4:                2.719 seconds (Total)
Chain 4: 
summary(m_learn_trials_bayes)
 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: n_trials ~ gamified + gamified:exp_group_c + gamified:gamified_first_c + gamified:gamified_first_c:exp_group_c + (1 | subject) 
   Data: d_learn_trials_m (Number of observations: 332) 
  Draws: 4 chains, each with iter = 11000; warmup = 1000; thin = 1;
         total post-warmup draws = 40000

Multilevel Hyperparameters:
~subject (Number of levels: 166) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)    24.23      1.55    21.36    27.44 1.00     9301    17644

Regression Coefficients:
                                           Estimate Est.Error l-95% CI u-95% CI
Intercept                                    151.10      2.13   146.91   155.30
gamifiedTRUE                                  -0.81      1.37    -3.53     1.90
gamifiedFALSE:exp_group_c                     -2.69      3.71    -9.90     4.58
gamifiedTRUE:exp_group_c                      -3.62      3.72   -10.90     3.65
gamifiedFALSE:gamified_first_c                 0.73      3.73    -6.64     8.05
gamifiedTRUE:gamified_first_c                -10.34      3.73   -17.69    -2.96
gamifiedFALSE:exp_group_c:gamified_first_c    -0.95      5.92   -12.59    10.69
gamifiedTRUE:exp_group_c:gamified_first_c     -2.43      5.91   -13.96     9.08
                                           Rhat Bulk_ESS Tail_ESS
Intercept                                  1.00     6797    13747
gamifiedTRUE                               1.00    68590    27470
gamifiedFALSE:exp_group_c                  1.00     8772    17479
gamifiedTRUE:exp_group_c                   1.00     8995    17343
gamifiedFALSE:gamified_first_c             1.00     8900    16298
gamifiedTRUE:gamified_first_c              1.00     8886    16215
gamifiedFALSE:exp_group_c:gamified_first_c 1.00    13869    21955
gamifiedTRUE:exp_group_c:gamified_first_c  1.00    13753    21771

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma    12.59      0.71    11.30    14.08 1.00    18091    25582

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

Inspect the posterior sample distributions of the fixed effects:

plot(m_learn_trials_bayes, nvariables = 8, variable = "^b_", regex = TRUE)

Bayes factors

Do a hypothesis learn for all fixed-effect coefficients (both main effects and interactions) in the model being equal to zero. The column Evid.Ratio shows the Bayes Factor in favour of the null hypothesis (\(BF_{01}\)).

h_learn_trials <- hypothesis(m_learn_trials_bayes,
                             c("gamifiedTRUE = 0",
                               "gamifiedFALSE:exp_group_c = 0",
                               "gamifiedTRUE:exp_group_c = 0",
                               "gamifiedFALSE:gamified_first_c = 0",
                               "gamifiedTRUE:gamified_first_c = 0",
                               "gamifiedFALSE:exp_group_c:gamified_first_c = 0",
                               "gamifiedTRUE:exp_group_c:gamified_first_c = 0"),
                             class = "b")


h_learn_trials$hypothesis |>
  mutate(BF10 = 1 / Evid.Ratio,
         evidence_for_null = sapply(Evid.Ratio, bf_to_strength))

This hypothesis test is calculating the Savage-Dickey density ratio at zero, which is a ratio of the posterior density at zero relative to the prior density at zero (indicated by dashed vertical line). Values above 1 indicate a stronger belief that the effect is indeed zero after having observed the data.

sd_ratio_trials <- plot(h_learn_trials, nvariables = 8, variable = "^b_", regex = TRUE, plot = FALSE)

sd_ratio_trials[[1]] +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey25")

Conclusions

  • Gamified feedback had no effect on response accuracy during practice. Response accuracy was slightly higher in the Control condition for participants in the Points group than for participants in the Progress bar group, possibly due to group differences in spite of random assignment.
  • Correct responses were slower during practice with gamified feedback than in the control condition, particularly so in Block 1.
  • There were no overall effects of the gamification manipulation on total score, number of words practiced, or number of trials completed. However, in the gamified conditions, these outcomes were all better in Block 2 than in Block 1.

Combined plot

((p_learn_acc | p_learn_rt) / (p_learn_trials | p_learn_words | p_learn_score)) +
  plot_layout(guides = "collect") +
  plot_annotation(tag_levels = "a")

ggsave(here("output", "practice_performance_all.png"), width = 7.5, height = 5)

Streamlined version:

(p_learn_acc | p_learn_rt | p_learn_score) +
  plot_layout(guides = "collect") +
  plot_annotation(tag_levels = "a")

ggsave(here("output", "practice_performance.png"), width = 7.5, height = 3)

Save hypothesis testing output for visualisation

fwrite(h_learn_acc$hypothesis, here("output", "hypothesis_tests", "h_learn_acc.csv"))
fwrite(h_learn_rt$hypothesis, here("output", "hypothesis_tests", "h_learn_rt.csv"))
fwrite(h_learn_score$hypothesis, here("output", "hypothesis_tests", "h_learn_score.csv"))
fwrite(h_learn_words$hypothesis, here("output", "hypothesis_tests", "h_learn_words.csv"))
fwrite(h_learn_trials$hypothesis, here("output", "hypothesis_tests", "h_learn_trials.csv"))

Session info

sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Sonoma 14.5

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/Amsterdam
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] flextable_0.9.6   data.table_1.14.8 future_1.33.0     brms_2.21.0      
 [5] Rcpp_1.0.11       lmerTest_3.1-3    lme4_1.1-34       Matrix_1.6-1.1   
 [9] tidyr_1.3.0       stringr_1.5.0     patchwork_1.3.0   scales_1.3.0     
[13] ggplot2_3.5.0     dplyr_1.1.3       here_1.0.1       

loaded via a namespace (and not attached):
  [1] gridExtra_2.3           inline_0.3.19           sandwich_3.0-2         
  [4] rlang_1.1.1             magrittr_2.0.3          multcomp_1.4-25        
  [7] matrixStats_1.3.0       compiler_4.3.1          loo_2.7.0              
 [10] reshape2_1.4.4          systemfonts_1.0.4       callr_3.7.3            
 [13] vctrs_0.6.3             httpcode_0.3.0          pkgconfig_2.0.3        
 [16] crayon_1.5.2            fastmap_1.1.1           ellipsis_0.3.2         
 [19] backports_1.4.1         labeling_0.4.3          utf8_1.2.3             
 [22] promises_1.2.1          rmarkdown_2.25          ps_1.7.5               
 [25] nloptr_2.0.3            ragg_1.2.7              purrr_1.0.2            
 [28] xfun_0.40               cachem_1.0.8            jsonlite_1.8.7         
 [31] later_1.3.1             uuid_1.2-0              parallel_4.3.1         
 [34] prettyunits_1.2.0       R6_2.5.1                bslib_0.5.1            
 [37] stringi_1.7.12          StanHeaders_2.32.6      parallelly_1.36.0      
 [40] boot_1.3-28.1           jquerylib_0.1.4         numDeriv_2016.8-1.1    
 [43] estimability_1.4.1      rstan_2.32.6            knitr_1.44             
 [46] zoo_1.8-12              bayesplot_1.11.1        httpuv_1.6.12          
 [49] splines_4.3.1           tidyselect_1.2.0        rstudioapi_0.15.0      
 [52] abind_1.4-5             yaml_2.3.7              codetools_0.2-19       
 [55] curl_5.1.0              processx_3.8.2          listenv_0.9.0          
 [58] pkgbuild_1.4.2          plyr_1.8.9              lattice_0.21-9         
 [61] tibble_3.2.1            shiny_1.8.0             withr_2.5.1            
 [64] bridgesampling_1.1-2    askpass_1.2.0           posterior_1.5.0        
 [67] coda_0.19-4             evaluate_0.22           survival_3.5-7         
 [70] RcppParallel_5.1.7      zip_2.3.1               xml2_1.3.5             
 [73] pillar_1.9.0            tensorA_0.36.2.1        checkmate_2.3.1        
 [76] stats4_4.3.1            distributional_0.4.0    generics_0.1.3         
 [79] rprojroot_2.0.3         rstantools_2.4.0        munsell_0.5.0          
 [82] minqa_1.2.6             globals_0.16.2          xtable_1.8-4           
 [85] glue_1.6.2              gdtools_0.3.7           emmeans_1.8.9          
 [88] tools_4.3.1             gfonts_0.2.0            mvtnorm_1.2-3          
 [91] grid_4.3.1              QuickJSR_1.1.3          colorspace_2.1-0       
 [94] nlme_3.1-163            cli_3.6.1               textshaping_0.3.7      
 [97] officer_0.6.6           fontBitstreamVera_0.1.1 fansi_1.0.4            
[100] Brobdingnag_1.2-9       V8_4.3.3                gtable_0.3.4           
[103] sass_0.4.7              digest_0.6.33           fontquiver_0.2.1       
[106] crul_1.4.2              TH.data_1.1-2           farver_2.1.1           
[109] htmltools_0.5.6         lifecycle_1.0.3         mime_0.12              
[112] openssl_2.1.1           fontLiberation_0.1.0    MASS_7.3-60            
---
title: "Analysis: learning session" 
subtitle: "Gamified Feedback in Adaptive Retrieval Practice"
author: "Maarten van der Velde & Gesa van den Broek"
date: "Last updated: `r Sys.Date()`"
output:
  html_notebook:
    smart: no
    toc: yes
    toc_float: yes
  github_document:
    toc: yes
editor_options: 
  chunk_output_type: inline
---

```{r setup, include = FALSE}
# Remove to disable caching
knitr::opts_chunk$set(cache = TRUE)
```

# Setup

```{r}
library(here)
library(dplyr)
library(ggplot2)
library(scales)
library(patchwork)
library(stringr)
library(tidyr)
library(lme4)
library(lmerTest)
library(brms)

# Set up parallel processing for Bayesian models
library(future)
plan(multisession, workers = 4)
```

Helper functions for plots and tables:
```{r}
source(here("scripts", "00_visualisation_functions.R"))
```

Load processed data:
```{r}
d_learn <- readRDS(here("data", "processed", "d_learn.rds"))
```

```{r}
add_experiment_cols <- function (data) {
  data |>
    mutate(exp_order = case_when(
      gamified_first == 0 & exp_group == "score" ~ "Control—Score",
      gamified_first == 0 & exp_group == "both" ~ "Control—Both",
      gamified_first == 1 & exp_group == "score" ~ "Score—Control",
      gamified_first == 1 & exp_group == "both" ~ "Both—Control"
    )) |>
    mutate(type = ifelse(gamified, "Gamified", "Control"))
}
```

Helper function for interpreting Bayes factors:
```{r}
bf_to_strength <- function (bf) {
  
  direction <- "for"
  
  if (bf < 1) {
    bf <- 1/bf
    direction <- "against"
  }
  
  strength <- case_when(
    bf == 1 ~ "No",
    bf < 3 ~ "Anecdotal",
    bf < 10 ~ "Moderate",
    bf < 30 ~ "Strong",
    bf < 100 ~ "Very strong",
    TRUE ~ "Extreme"
  )
  
  paste0(strength, " evidence ", direction)
}
```

# Does gamification change performance during practice?

## Accuracy

### Prepare data
```{r}
d_learn_acc <- d_learn |>
  filter(!study_trial) |>
  group_by(subject, exp_group, block, condition, gamified, gamified_first) |>
  summarise(accuracy = mean(correct))

d_learn_acc_agg <- d_learn_acc |>
  group_by(block, condition, gamified, gamified_first, exp_group) |>
  summarise(acc = mean(accuracy, na.rm = T),
            acc_se = sd(accuracy, na.rm = T)/sqrt(n())) |>
  ungroup() |>
  add_experiment_cols()
```

### Visualise data

```{r}
p_learn_acc <- plot_data(d_learn_acc_agg, acc, acc_se, "Accuracy") +
  scale_y_continuous(limits = c(.725, .875), labels = scales::percent_format())

p_learn_acc
```

### Fit frequentist model

Prepare data for modelling by mean-centering categorical predictors:
```{r}
d_learn_m <- d_learn |>
  filter(!study_trial) |>
  mutate(exp_group_c = ifelse(exp_group == "score", 0, 1),
         exp_group_c = exp_group_c - mean(exp_group_c),
         gamified_first_c = gamified_first - mean(gamified_first))
```

```{r}
m_learn_acc <- glmer(correct ~ gamified +
                       gamified:exp_group_c +
                       gamified:gamified_first_c +
                       gamified:gamified_first_c:exp_group_c +
                       (1 | subject) + (1 | fact),
                     family = "binomial",
                     data = d_learn_m)

summary(m_learn_acc)
print_model_table(m_learn_acc)
```


#### Fitted values

```{r}
d_model_fit <- crossing(
  gamified = FALSE, 
  exp_group_c = sort(unique(d_learn_m$exp_group_c)), 
  gamified_first_c = 0
)

d_model_fit$model_fit <- predict(m_learn_acc,
                                 newdata = d_model_fit,
                                 re.form = NA, 
                                 type = "response")

d_model_fit
```



#### Visualise fitted model

```{r}
p_learn_acc_m <- plot_model_fit(m_learn_acc, d_learn_m, y_lab = "Accuracy") +
  scale_y_continuous(limits = c(.75, .90), labels = scales::percent_format(accuracy = .1))

p_learn_acc_m
```
### Fit Bayesian model

Fit again with `brms` so that we can calculate Bayes Factors.
Because we expect any fixed effects to be at most moderate in size, we will use a weakly informative Normal(0, 1) prior for these effects.
```{r}
m_learn_acc_brms <- brm(correct ~ gamified +
                          gamified:exp_group_c +
                          gamified:gamified_first_c +
                          gamified:gamified_first_c:exp_group_c +
                          (1 | subject) + (1 | fact),
                        family = "bernoulli",
                        data = d_learn_m,
                        prior = set_prior("normal(0, 1)", class = "b"),
                        chains = 4,
                        iter = 11000,
                        warmup = 1000,
                        sample_prior = TRUE,
                        future = TRUE,
                        seed = 0)

summary(m_learn_acc_brms)
```

Inspect the posterior sample distributions of the fixed effects:
```{r, fig.height = 12, fig.width = 8}
plot(m_learn_acc_brms, nvariables = 8, variable = "^b_", regex = TRUE)
```

#### Bayes factors

Do a hypothesis test for all fixed-effect coefficients (both main effects and interactions) in the model being equal to zero.
The column `Evid.Ratio` shows the Bayes Factor in favour of the null hypothesis ($BF_{01}$).
```{r}
h_learn_acc <- hypothesis(m_learn_acc_brms,
                          c("gamifiedTRUE = 0",
                            "gamifiedFALSE:exp_group_c = 0",
                            "gamifiedTRUE:exp_group_c = 0",
                            "gamifiedFALSE:gamified_first_c = 0",
                            "gamifiedTRUE:gamified_first_c = 0",
                            "gamifiedFALSE:exp_group_c:gamified_first_c = 0",
                            "gamifiedTRUE:exp_group_c:gamified_first_c = 0"),
                          class = "b")

h_learn_acc$hypothesis |>
  mutate(BF10 = 1 / Evid.Ratio,
         evidence_for_null = sapply(Evid.Ratio, bf_to_strength))
```

This hypothesis test is calculating the Savage-Dickey density ratio at zero, which is a ratio of the posterior density at zero relative to the prior density at zero (indicated by dashed vertical line).
Values above 1 indicate a stronger belief that the effect is indeed zero after having observed the data.  
```{r, fig.height = 12, fig.width = 6}
sd_ratio_acc <- plot(h_learn_acc, nvariables = 8, variable = "^b_", regex = TRUE, plot = FALSE)

sd_ratio_acc[[1]] +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey25")
```



#### Conclusion

The Bayesian model finds anecdotal to strong evidence in favour of the null hypothesis (no effect on learning accuracy) for each of the regression coefficients.

## Response time

Response time on correct answers only.

### Prepare data

To keep the visualisation of average response times by condition simple, we calculate the median RT per participant, and then take the mean and SD of these medians (which are themselves roughly normally distributed).
```{r}
d_learn_rt <- d_learn |>
  filter(!study_trial) |>
  filter(correct) |>
  mutate(rt = rt / 1000) |>
  group_by(subject, exp_group, block, condition, gamified, gamified_first) |>
  summarise(rt_median = median(rt, na.rm = TRUE))

d_learn_rt_agg <- d_learn_rt |>
  group_by(block, condition, gamified, gamified_first, exp_group) |>
  summarise(rt_mean = mean(rt_median, na.rm = T),
            rt_se = sd(rt_median, na.rm = T)/sqrt(n())) |>
  ungroup() |>
  add_experiment_cols()
```

### Visualise data

```{r}
p_learn_rt <- plot_data(d_learn_rt_agg, rt_mean, rt_se, "Response time (s)") +
  scale_y_continuous(limits = c(1.3, 1.8), labels = scales::comma_format())

p_learn_rt
```


### Fit frequentist model

Since RT data is not normally distributed, we fit a lognormal model to the response times. 
(See https://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#gamma-glmms .)
Prepare data for modelling by mean-centering categorical predictors:
```{r}
d_learn_rt_m <- d_learn |>
  filter(!study_trial) |>
  filter(correct) |>
  mutate(log_rt = log(rt / 1000)) |>
  mutate(exp_group_c = ifelse(exp_group == "score", 0, 1),
         exp_group_c = exp_group_c - mean(exp_group_c),
         gamified_first_c = gamified_first - mean(gamified_first)
         )
```


```{r}
m_learn_rt <- lmer(log_rt ~ gamified +
                      gamified:exp_group_c +
                      gamified:gamified_first_c +
                      gamified:gamified_first_c:exp_group_c +
                      (1 | subject) + (1 | fact),
                    data = d_learn_rt_m)

summary(m_learn_rt)
print_model_table(m_learn_rt)
```


#### Fitted values
```{r}
d_model_fit <- crossing(
  gamified = c(FALSE, TRUE), 
  exp_group_c = 0, 
  gamified_first_c = 0
)

d_model_fit$model_fit <- predict(m_learn_rt,
                                 newdata = d_model_fit,
                                 re.form = NA, 
                                 type = "response") |>
  exp() # Transform logRT to RT

d_model_fit
```

```{r}
d_model_fit <- crossing(
  gamified = c(FALSE, TRUE), 
  exp_group_c = 0, 
  gamified_first_c = sort(unique(d_learn_rt_m$gamified_first_c))
)

d_model_fit$model_fit <- predict(m_learn_rt,
                                 newdata = d_model_fit,
                                 re.form = NA, 
                                 type = "response") |>
  exp() # Transform logRT to RT

d_model_fit
```

### Visualise fitted model

```{r}
p_learn_rt_m <- plot_model_fit(m_learn_rt, d_learn_rt_m, exp_trans = TRUE, y_lab = "Response time (s)") +
  scale_y_continuous(limits = c(1.4, 1.9), labels = scales::comma_format())

p_learn_rt_m
```
### Fit Bayesian model

Fit again with `brms` so that we can calculate Bayes Factors.
Because we expect any fixed effects to be at most moderate in size, we will use a weakly informative Normal(0, .1) prior for these effects.
```{r}
m_learn_rt_brms <- brm(log_rt ~ gamified +
                         gamified:exp_group_c +
                         gamified:gamified_first_c +
                         gamified:gamified_first_c:exp_group_c +
                         (1 | subject) + (1 | fact),
                       family = "gaussian",
                       data = d_learn_rt_m,
                       prior = set_prior("normal(0, .1)", class = "b"),
                       chains = 4,
                       iter = 11000,
                       warmup = 1000,
                       sample_prior = TRUE,
                       future = TRUE,
                       seed = 0)

summary(m_learn_rt_brms)
```

Inspect the posterior sample distributions of the fixed effects:
```{r, fig.height = 12, fig.width = 8}
plot(m_learn_rt_brms, nvariables = 8, variable = "^b_", regex = TRUE)
```


#### Bayes factors

Do a hypothesis learn for all fixed-effect coefficients (both main effects and interactions) in the model being equal to zero.
The column `Evid.Ratio` shows the Bayes Factor in favour of the null hypothesis ($BF_{01}$).
```{r}
h_learn_rt <- hypothesis(m_learn_rt_brms,
                         c("gamifiedTRUE = 0",
                           "gamifiedFALSE:exp_group_c = 0",
                           "gamifiedTRUE:exp_group_c = 0",
                           "gamifiedFALSE:gamified_first_c = 0",
                           "gamifiedTRUE:gamified_first_c = 0",
                           "gamifiedFALSE:exp_group_c:gamified_first_c = 0",
                           "gamifiedTRUE:exp_group_c:gamified_first_c = 0"),
                         class = "b")


h_learn_rt$hypothesis |>
  mutate(BF10 = 1 / Evid.Ratio,
         evidence_for_null = sapply(Evid.Ratio, bf_to_strength))
```

This hypothesis test is calculating the Savage-Dickey density ratio at zero, which is a ratio of the posterior density at zero relative to the prior density at zero (indicated by dashed vertical line).
Values above 1 indicate a stronger belief that the effect is indeed zero after having observed the data.  
```{r, fig.height = 12, fig.width = 6}
sd_ratio_rt <- plot(h_learn_rt, nvariables = 8, variable = "^b_", regex = TRUE, plot = FALSE)

sd_ratio_rt[[1]] +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey25")
```

#### Fitted values
```{r}
d_model_fit <- crossing(
  gamified = c(FALSE, TRUE), 
  exp_group_c = 0, 
  gamified_first_c = sort(unique(d_learn_rt_m$gamified_first_c))
)

d_model_fit$model_fit <- predict(m_learn_rt_brms,
                                 newdata = d_model_fit,
                                 re_formula = NA,
                                 type = "response")[,1] |>
  exp() # Transform logRT to RT

d_model_fit
```


#### Conclusion
The Bayesian model finds anecdotal to strong evidence in favour of the null hypothesis (no effect on correct RT) for each of the regression coefficients.

## Total score

The total score is the number of points after the last trial in a block.

### Prepare data
```{r}
d_learn_score <- d_learn |>
  group_by(subject, exp_group, block, condition, gamified, gamified_first) |>
  slice(n())

d_learn_score_agg <- d_learn_score |>
  group_by(block, condition, gamified, gamified_first, exp_group) |>
  summarise(feedback_score_mean = mean(feedback_score, na.rm = T),
            feedback_score_se = sd(feedback_score, na.rm = T)/sqrt(n())) |>
  ungroup() |>
  add_experiment_cols()
```

### Visualise data

```{r}
p_learn_score <- plot_data(d_learn_score_agg, feedback_score_mean, feedback_score_se, "Total score") +
  scale_y_continuous(limits = c(1000, 1400), labels = scales::comma_format())

p_learn_score
```


Distribution of scores:
```{r}
p_learn_score_dist <- ggplot(d_learn_score, aes(x = feedback_score, fill = condition)) +
  facet_grid(condition ~ .) +
  geom_histogram(aes(y=..density..), colour = "black", binwidth = 100) +
  geom_density(alpha = .5) +
  geom_vline(xintercept = c(1200, 1500), lty = 2) +
  scale_fill_manual(values = col_condition) +
  scale_colour_manual(values = col_condition) +
  guides(fill = "none",
         colour = "none") +
  labs(x = "Total score",
       y = "Density") +
  theme_paper

p_learn_score_dist

ggsave(here("output", "practice_scores.png"), width = 7.5, height = 5)
```


### Fit frequentist model

Prepare data for modelling by mean-centering categorical predictors:
```{r}
d_learn_score_m <- d_learn_score |>
  ungroup() |>
  mutate(exp_group_c = ifelse(exp_group == "score", 0, 1),
         exp_group_c = exp_group_c - mean(exp_group_c),
         gamified_first_c = gamified_first - mean(gamified_first))
```


```{r}
m_learn_score <- lmer(feedback_score ~ gamified +
                       gamified:exp_group_c +
                       gamified:gamified_first_c +
                       gamified:gamified_first_c:exp_group_c +
                       (1 | subject),
                     data = d_learn_score_m)

summary(m_learn_score)
print_model_table(m_learn_score)
```


#### Fitted values

```{r}
d_model_fit <- crossing(
  gamified = TRUE, 
  exp_group_c = 0,
  gamified_first_c = sort(unique(d_learn_score_m$gamified_first_c))
)

d_model_fit$model_fit <- predict(m_learn_score,
                                 newdata = d_model_fit,
                                 re.form = NA, 
                                 type = "response")

d_model_fit
```

### Fit Bayesian model

Since score is on a much larger scale than other dependent variables, we expect coefficients for  fixed effects to be on a much larger scale too.
Adjust the prior accordingly:
```{r}
m_learn_score_bayes <- brm(feedback_score ~ gamified +
                             gamified:exp_group_c +
                             gamified:gamified_first_c +
                             gamified:gamified_first_c:exp_group_c +
                             (1 | subject),
                           family = "gaussian",
                           data = d_learn_score_m,
                           prior = set_prior("normal(0, 100)", class = "b"),
                           chains = 4,
                           iter = 11000,
                           warmup = 1000,
                           sample_prior = TRUE,
                           future = TRUE,
                           seed = 0)

summary(m_learn_score_bayes)
```

Inspect the posterior sample distributions of the fixed effects:
```{r, fig.height = 12, fig.width = 8}
plot(m_learn_score_bayes, nvariables = 8, variable = "^b_", regex = TRUE)
```


#### Bayes factors

Do a hypothesis learn for all fixed-effect coefficients (both main effects and interactions) in the model being equal to zero.
The column `Evid.Ratio` shows the Bayes Factor in favour of the null hypothesis ($BF_{01}$).
```{r}
h_learn_score <- hypothesis(m_learn_score_bayes,
                            c("gamifiedTRUE = 0",
                              "gamifiedFALSE:exp_group_c = 0",
                              "gamifiedTRUE:exp_group_c = 0",
                              "gamifiedFALSE:gamified_first_c = 0",
                              "gamifiedTRUE:gamified_first_c = 0",
                              "gamifiedFALSE:exp_group_c:gamified_first_c = 0",
                              "gamifiedTRUE:exp_group_c:gamified_first_c = 0"),
                            class = "b")


h_learn_score$hypothesis |>
  mutate(BF10 = 1 / Evid.Ratio,
         evidence_for_null = sapply(Evid.Ratio, bf_to_strength))
```

This hypothesis test is calculating the Savage-Dickey density ratio at zero, which is a ratio of the posterior density at zero relative to the prior density at zero (indicated by dashed vertical line).
Values above 1 indicate a stronger belief that the effect is indeed zero after having observed the data.  
```{r, fig.height = 12, fig.width = 6}
sd_ratio_score <- plot(h_learn_score, nvariables = 8, variable = "^b_", regex = TRUE, plot = FALSE)

sd_ratio_score[[1]] +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey25")
```

#### Fitted values

```{r}
d_model_fit <- crossing(
  gamified = TRUE, 
  exp_group_c = 0,
  gamified_first_c = sort(unique(d_learn_score_m$gamified_first_c))
)

d_model_fit$model_fit <- predict(m_learn_score_bayes,
                                 newdata = d_model_fit,
                                 re_formula = NA,
                                 type = "response")[,1]

d_model_fit
```



## Number of words practiced

### Prepare data
```{r}
d_learn_words <- d_learn |>
  group_by(subject, exp_group, block, condition, gamified, gamified_first) |>
  summarise(words_seen = n_distinct(fact))

d_learn_words_agg <- d_learn_words |>
  group_by(block, condition, gamified, gamified_first, exp_group) |>
  summarise(words_mean = mean(words_seen, na.rm = T),
            words_se = sd(words_seen, na.rm = T)/sqrt(n())) |>
  ungroup() |>
  add_experiment_cols()
```

### Visualise data

```{r}
p_learn_words <- plot_data(d_learn_words_agg, words_mean, words_se, "Words practiced") +
  scale_y_continuous(limits = c(20, 30))

p_learn_words
```


### Fit frequentist model

Prepare data for modelling by mean-centering categorical predictors:
```{r}
d_learn_words_m <- d_learn_words |>
  ungroup() |>
  mutate(exp_group_c = ifelse(exp_group == "score", 0, 1),
         exp_group_c = exp_group_c - mean(exp_group_c),
         gamified_first_c = gamified_first - mean(gamified_first))
```


```{r}
m_learn_words <- lmer(words_seen ~ gamified +
                       gamified:exp_group_c +
                       gamified:gamified_first_c +
                       gamified:gamified_first_c:exp_group_c +
                       (1 | subject),
                     data = d_learn_words_m)

summary(m_learn_words)
print_model_table(m_learn_words)
```


#### Fitted values
```{r}
d_model_fit <- crossing(
  gamified = TRUE, 
  exp_group_c = 0,
  gamified_first_c = sort(unique(d_learn_words_m$gamified_first_c))
)

d_model_fit$model_fit <- predict(m_learn_words,
                                 newdata = d_model_fit,
                                 re.form = NA, 
                                 type = "response")

d_model_fit
```


#### Visualise fitted model
```{r}
p_learn_words_m <- plot_model_fit(m_learn_words, d_learn_words_m, y_lab = "Words practiced") +
  scale_y_continuous(limits = c(20, 30))

p_learn_words_m
```

### Fit Bayesian model

As before, we'll adjust the prior to fit the wider range of likely coefficients, given the scale of the dependent variable.
```{r}
m_learn_words_bayes <- brm(words_seen ~ gamified +
                           gamified:exp_group_c +
                           gamified:gamified_first_c +
                           gamified:gamified_first_c:exp_group_c +
                           (1 | subject),
                         family = "gaussian",
                         data = d_learn_words_m,
                         prior = set_prior("normal(0, 3)", class = "b"),
                         chains = 4,
                         iter = 11000,
                         warmup = 1000,
                         sample_prior = TRUE,
                         future = TRUE,
                         seed = 0)

summary(m_learn_words_bayes)
```

Inspect the posterior sample distributions of the fixed effects:
```{r, fig.height = 12, fig.width = 8}
plot(m_learn_words_bayes, nvariables = 8, variable = "^b_", regex = TRUE)
```


#### Bayes factors

Do a hypothesis learn for all fixed-effect coefficients (both main effects and interactions) in the model being equal to zero.
The column `Evid.Ratio` shows the Bayes Factor in favour of the null hypothesis ($BF_{01}$).
```{r}
h_learn_words <- hypothesis(m_learn_words_bayes,
                            c("gamifiedTRUE = 0",
                              "gamifiedFALSE:exp_group_c = 0",
                              "gamifiedTRUE:exp_group_c = 0",
                              "gamifiedFALSE:gamified_first_c = 0",
                              "gamifiedTRUE:gamified_first_c = 0",
                              "gamifiedFALSE:exp_group_c:gamified_first_c = 0",
                              "gamifiedTRUE:exp_group_c:gamified_first_c = 0"),
                            class = "b")


h_learn_words$hypothesis |>
  mutate(BF10 = 1 / Evid.Ratio,
         evidence_for_null = sapply(Evid.Ratio, bf_to_strength))
```

This hypothesis test is calculating the Savage-Dickey density ratio at zero, which is a ratio of the posterior density at zero relative to the prior density at zero (indicated by dashed vertical line).
Values above 1 indicate a stronger belief that the effect is indeed zero after having observed the data.  
```{r, fig.height = 12, fig.width = 6}
sd_ratio_words <- plot(h_learn_words, nvariables = 8, variable = "^b_", regex = TRUE, plot = FALSE)

sd_ratio_words[[1]] +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey25")
```

## Completed trials

### Prepare data
```{r}
d_learn_trials <- d_learn |>
  group_by(subject, exp_group, block, condition, gamified, gamified_first) |>
  summarise(n_trials = n())

d_learn_trials_agg <- d_learn_trials |>
  group_by(block, condition, gamified, gamified_first, exp_group) |>
  summarise(trials_mean = mean(n_trials, na.rm = T),
            trials_se = sd(n_trials, na.rm = T)/sqrt(n())) |>
  ungroup() |>
  add_experiment_cols()
```

### Visualise data

```{r}
p_learn_trials <- plot_data(d_learn_trials_agg, trials_mean, trials_se, "Completed trials") +
  scale_y_continuous(limits = c(130, 170))

p_learn_trials
```



### Fit frequentist model

Prepare data for modelling by mean-centering categorical predictors:
```{r}
d_learn_trials_m <- d_learn_trials |>
  ungroup() |>
  mutate(exp_group_c = ifelse(exp_group == "score", 0, 1),
         exp_group_c = exp_group_c - mean(exp_group_c),
         gamified_first_c = gamified_first - mean(gamified_first))
```


```{r}
m_learn_trials <- lmer(n_trials ~ gamified +
                       gamified:exp_group_c +
                       gamified:gamified_first_c +
                       gamified:gamified_first_c:exp_group_c +
                       (1 | subject),
                     data = d_learn_trials_m)

summary(m_learn_trials)
print_model_table(m_learn_trials)
```


#### Fitted values
```{r}
d_model_fit <- crossing(
  gamified = TRUE, 
  exp_group_c = 0,
  gamified_first_c = sort(unique(d_learn_trials_m$gamified_first_c))
)

d_model_fit$model_fit <- predict(m_learn_trials,
                                 newdata = d_model_fit,
                                 re.form = NA, 
                                 type = "response")

d_model_fit
```


#### Visualise fitted model
```{r}
p_learn_trials_m <- plot_model_fit(m_learn_trials, d_learn_trials_m, y_lab = "Trials completed") +
  scale_y_continuous(limits = c(130, 170))

p_learn_trials_m
```

### Fit Bayesian model

```{r}
m_learn_trials_bayes <- brm(n_trials ~ gamified +
                           gamified:exp_group_c +
                           gamified:gamified_first_c +
                           gamified:gamified_first_c:exp_group_c +
                           (1 | subject),
                         family = "gaussian",
                         data = d_learn_trials_m,
                         prior = set_prior("normal(0, 10)", class = "b"),
                         chains = 4,
                         iter = 11000,
                         warmup = 1000,
                         sample_prior = TRUE,
                         future = TRUE,
                         seed = 0)

summary(m_learn_trials_bayes)
```

Inspect the posterior sample distributions of the fixed effects:
```{r, fig.height = 12, fig.width = 8}
plot(m_learn_trials_bayes, nvariables = 8, variable = "^b_", regex = TRUE)
```


#### Bayes factors

Do a hypothesis learn for all fixed-effect coefficients (both main effects and interactions) in the model being equal to zero.
The column `Evid.Ratio` shows the Bayes Factor in favour of the null hypothesis ($BF_{01}$).
```{r}
h_learn_trials <- hypothesis(m_learn_trials_bayes,
                             c("gamifiedTRUE = 0",
                               "gamifiedFALSE:exp_group_c = 0",
                               "gamifiedTRUE:exp_group_c = 0",
                               "gamifiedFALSE:gamified_first_c = 0",
                               "gamifiedTRUE:gamified_first_c = 0",
                               "gamifiedFALSE:exp_group_c:gamified_first_c = 0",
                               "gamifiedTRUE:exp_group_c:gamified_first_c = 0"),
                             class = "b")


h_learn_trials$hypothesis |>
  mutate(BF10 = 1 / Evid.Ratio,
         evidence_for_null = sapply(Evid.Ratio, bf_to_strength))
```

This hypothesis test is calculating the Savage-Dickey density ratio at zero, which is a ratio of the posterior density at zero relative to the prior density at zero (indicated by dashed vertical line).
Values above 1 indicate a stronger belief that the effect is indeed zero after having observed the data.  
```{r, fig.height = 12, fig.width = 6}
sd_ratio_trials <- plot(h_learn_trials, nvariables = 8, variable = "^b_", regex = TRUE, plot = FALSE)

sd_ratio_trials[[1]] +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey25")
```



## Conclusions

-	Gamified feedback had no effect on response accuracy during practice. Response accuracy was slightly higher in the Control condition for participants in the Points group than for participants in the Progress bar group, possibly due to group differences in spite of random assignment.
-	Correct responses were slower during practice with gamified feedback than in the control condition, particularly so in Block 1.
-	There were no overall effects of the gamification manipulation on total score, number of words practiced, or number of trials completed. However, in the gamified conditions, these outcomes were all better in Block 2 than in Block 1.


## Combined plot
```{r}
((p_learn_acc | p_learn_rt) / (p_learn_trials | p_learn_words | p_learn_score)) +
  plot_layout(guides = "collect") +
  plot_annotation(tag_levels = "a")

ggsave(here("output", "practice_performance_all.png"), width = 7.5, height = 5)
```

Streamlined version:
```{r}
(p_learn_acc | p_learn_rt | p_learn_score) +
  plot_layout(guides = "collect") +
  plot_annotation(tag_levels = "a")

ggsave(here("output", "practice_performance.png"), width = 7.5, height = 3)
```
# Save hypothesis testing output for visualisation
```{r}
fwrite(h_learn_acc$hypothesis, here("output", "hypothesis_tests", "h_learn_acc.csv"))
fwrite(h_learn_rt$hypothesis, here("output", "hypothesis_tests", "h_learn_rt.csv"))
fwrite(h_learn_score$hypothesis, here("output", "hypothesis_tests", "h_learn_score.csv"))
fwrite(h_learn_words$hypothesis, here("output", "hypothesis_tests", "h_learn_words.csv"))
fwrite(h_learn_trials$hypothesis, here("output", "hypothesis_tests", "h_learn_trials.csv"))
```

# Session info
```{r}
sessionInfo()
```
